clear all

* cd

insheet using tv_experiment_sm_data.csv, clear

/*
Our pre-analysis plan did not specify adjusting p-values for multiple testing. However, an anonymous reviewer suggested we assess whether the positive persuasive effects we observe of the LGBT ad while the advertisement was still running was a false positive.
*/

keep if t1_respondent == 1

local t0_covars  t0_factor_immpolicy t0_factor_immprej ///
	t0_factor_asyl_policy t0_factor_asyl_prej ///
	t0_factor_lgbtpolicy t0_factor_lgbtprej ///
	t0_therm_trans t0_therm_gay t0_therm_immigrant t0_therm_asylum ///
	baseline_factor_general_politics ///
	t0_economy_today t0_economy_yearfromnow t0_economy_personal ///
	t0_knowledge_imm_taxes t0_college_educ t0_ideology t0_pid7 ///
	t0_trump_2020 t0_congress_2020 ///
	t0_employ_fulltime t0_employ_parttime t0_live_city
local vf_covars vf_age vf_general18 vf_general16 vf_general14 vf_general12 ///
	vf_dem vf_rep vf_female vf_white vf_afam vf_hispanic i.state_enc
	
wyoung t1_factor_lgbtprej t1_factor_lgbtpolicy t1_factor_lgbtall t1_recall_lgbt ///
	t1_action_lgbt_online t1_action_lgbt_congress, ///
	cmd(regress OUTCOMEVAR i.survey_timing i.survey_timing#i.treat_lgbt  ///
	`t0_covars' `vf_covars', cluster(hh_id)) bootstraps(10000) seed(20) /// change to 10000
	familyp(1.treat_lgbt#1.survey_timing) cluster(hh_id)
// Seed (20) based on Example 1 of the help wyoung file
// Bootstraps: per help file: "Westfall and Young (1993) recommend using at least 10,000 bootstraps."

local imm_dvs t1_factor_immprej t1_factor_immpolicy t1_factor_immall ///
	t1_recall_imm t1_knowledge_imm_taxes t1_action_imm_online
local lgbt_dvs t1_recall_lgbt t1_factor_lgbtprej t1_factor_lgbtpolicy ///
	t1_factor_lgbtall t1_action_lgbt_congress t1_action_lgbt_online

tempname anderson_q_value 
postfile `anderson_q_value' str30 outcome timing treatment treat_coef treat_se treat_pvalue ///
	using anderson_q_value.dta, replace
foreach dv of varlist `imm_dvs' {
	foreach treat of numlist 1 2 {
	qui reg `dv' i.treat `t0_covars' `vf_covars' if inlist(treat, 0, `treat'), cluster(hh_id) // main tests
	local outcome = "`dv'"
	local treat = `treat'
	local b = _b[`treat'.treat]
	local se = _se[`treat'.treat]
	disp `b' `se'
	qui test `treat'.treat
	local p = r(p)
	disp `p' 
	post `anderson_q_value' ("`outcome'") (0) (`treat') (`b') (`se') (`p')
	}
}

foreach dv of varlist `imm_dvs' {
	foreach treat of numlist 1 2 {
		foreach timing of numlist 1 2 3 {
		qui reg `dv' i.treat `t0_covars' `vf_covars' if survey_timing == `timing' ///
			& inlist(treat, 0, `treat'), cluster(hh_id) // main tests
		local outcome = "`dv'"
		local treat = `treat'
		local b = _b[`treat'.treat]
		local se = _se[`treat'.treat]
		disp `b' `se'
		qui test `treat'.treat
		local p = r(p)
		disp `p' 
		post `anderson_q_value' ("`outcome'") (`timing') (`treat') (`b') (`se') (`p')
		}
	}
}

foreach dv of varlist `lgbt_dvs' {
	qui reg `dv' i.treat_lgbt `t0_covars' `vf_covars', cluster(hh_id) // main tests
	local outcome = "`dv'"
	local treat = 0
	local b = _b[1.treat_lgbt]
	local se = _se[1.treat_lgbt]
	disp `b' `se'
	qui test 1.treat_lgbt
	local p = r(p)
	disp `p' 
	post `anderson_q_value' ("`outcome'") (0) (`treat') (`b') (`se') (`p')
}

foreach dv of varlist `lgbt_dvs' {
	foreach timing of numlist 1 2 3 {
	qui reg `dv' i.treat_lgbt `t0_covars' `vf_covars' if survey_timing == `timing', cluster(hh_id) // main tests
	local outcome = "`dv'"
	local treat = 0
	local b = _b[1.treat_lgbt]
	local se = _se[1.treat_lgbt]
	disp `b' `se'
	qui test 1.treat_lgbt
	local p = r(p)
	disp `p' 
	post `anderson_q_value' ("`outcome'") (`timing') (`treat') (`b') (`se') (`p')
	}
}

postclose `anderson_q_value' 

use "anderson_q_value.dta", clear

// Drop the Eddie treatment for the knowledge DV: not a relevant p-value given
// Eddie treatment didn't discuss this fact
drop if outcome == "t1_knowledge_imm_taxes" & treatment == 1

rename treat_pvalue pval

* This code generates BKY (2006) sharpened two-stage q-values as described in Anderson (2008), "Multiple Inference and Gender Differences in the Effects of Early Intervention: A Reevaluation of the Abecedarian, Perry Preschool, and Early Training Projects", Journal of the American Statistical Association, 103(484), 1481-1495

* BKY (2006) sharpened two-stage q-values are introduced in Benjamini, Krieger, and Yekutieli (2006), "Adaptive Linear Step-up Procedures that Control the False Discovery Rate", Biometrika, 93(3), 491-507

* Last modified: M. Anderson, 11/20/07
* Test Platform: Stata/MP 10.0 for Macintosh (Intel 32-bit), Mac OS X 10.5.1
* Should be compatible with Stata 10 or greater on all platforms
* Likely compatible with with Stata 9 or earlier on all platforms (remove "version 10" line below)

version 10

****  INSTRUCTIONS:
****    Please start with a clear data set
****	When prompted, paste the vector of p-values you are testing into the "pval" variable
****	Please use the "do" button rather than the "run" button to run this file (if you use "run", you will miss the instructions at the prompts)

pause on
set more off

/*
if _N>0 {
	display "Please clear data set before proceeding"
	display "After clearing, type 'q' to resume"
	pause
	}	

quietly gen float pval = .

display "***********************************"
display "Please paste the vector of p-values that you wish to test into the variable 'pval'"
display	"After pasting, type 'q' to resume"
display "***********************************"

pause
*/

* Collect the total number of p-values tested

quietly sum pval
local totalpvals = r(N)

* Sort the p-values in ascending order and generate a variable that codes each p-value's rank

quietly gen int original_sorting_order = _n
quietly sort pval
quietly gen int rank = _n if pval~=.

* Set the initial counter to 1 

local qval = 1

* Generate the variable that will contain the BKY (2006) sharpened q-values

gen bky06_qval = 1 if pval~=.

* Set up a loop that begins by checking which hypotheses are rejected at q = 1.000, then checks which hypotheses are rejected at q = 0.999, then checks which hypotheses are rejected at q = 0.998, etc.  The loop ends by checking which hypotheses are rejected at q = 0.001.


while `qval' > 0 {
	* First Stage
	* Generate the adjusted first stage q level we are testing: q' = q/1+q
	local qval_adj = `qval'/(1+`qval')
	* Generate value q'*r/M
	gen fdr_temp1 = `qval_adj'*rank/`totalpvals'
	* Generate binary variable checking condition p(r) <= q'*r/M
	gen reject_temp1 = (fdr_temp1>=pval) if pval~=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen reject_rank1 = reject_temp1*rank
	* Record the rank of the largest p-value that meets above condition
	egen total_rejected1 = max(reject_rank1)

	* Second Stage
	* Generate the second stage q level that accounts for hypotheses rejected in first stage: q_2st = q'*(M/m0)
	local qval_2st = `qval_adj'*(`totalpvals'/(`totalpvals'-total_rejected1[1]))
	* Generate value q_2st*r/M
	gen fdr_temp2 = `qval_2st'*rank/`totalpvals'
	* Generate binary variable checking condition p(r) <= q_2st*r/M
	gen reject_temp2 = (fdr_temp2>=pval) if pval~=.
	* Generate variable containing p-value ranks for all p-values that meet above condition
	gen reject_rank2 = reject_temp2*rank
	* Record the rank of the largest p-value that meets above condition
	egen total_rejected2 = max(reject_rank2)

	* A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that meets the above condition
	replace bky06_qval = `qval' if rank <= total_rejected2 & rank~=.
	* Reduce q by 0.001 and repeat loop
	drop fdr_temp* reject_temp* reject_rank* total_rejected*
	local qval = `qval' - .001
}
	

quietly sort original_sorting_order
pause off
set more on

display "Code has completed."
display "Benjamini Krieger Yekutieli (2006) sharpened q-vals are in variable 'bky06_qval'"
display	"Sorting order is the same as the original vector of p-values"


* Note: Sharpened FDR q-vals can be LESS than unadjusted p-vals when many hypotheses are rejected, because if you have many true rejections, then you can tolerate several false rejections too (this effectively just happens for p-vals that are so large that you are not going to reject them regardless).

list if strpos(outcome, "lgbt") != 0 & timing == 1
list if outcome == "t1_factor_immall" & timing == 0 & treatment == 2
